{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ff3a63c3-3bf5-4624-9dd2-18f7aae3b912",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "“泰坦尼克号”乘客信息数据集\n",
      "    pclass                                             name     sex      age  \\\n",
      "0      1st                     Allen, Miss Elisabeth Walton  female  29.0000   \n",
      "1      1st                      Allison, Miss Helen Loraine  female   2.0000   \n",
      "2      1st              Allison, Mr Hudson Joshua Creighton    male  30.0000   \n",
      "3      1st  Allison, Mrs Hudson J.C. (Bessie Waldo Daniels)  female  25.0000   \n",
      "4      1st                    Allison, Master Hudson Trevor    male   0.9167   \n",
      "..     ...                                              ...     ...      ...   \n",
      "834    3rd                                 Guest, Mr Robert    male      NaN   \n",
      "835    3rd                     Gustafsson, Mr Alfred Ossian    male  20.0000   \n",
      "836    3rd                    Gustafsson, Mr Anders Vilhelm    male  37.0000   \n",
      "837    3rd                      Gustafsson, Mr Johan Birger    male  28.0000   \n",
      "838    3rd                       Gustafsson, Mr Karl Gideon    male  19.0000   \n",
      "\n",
      "        embarked      ticket room  survived  \n",
      "0    Southampton  24160 L221  B-5         1  \n",
      "1    Southampton         NaN  C26         0  \n",
      "2    Southampton         NaN  C26         0  \n",
      "3    Southampton         NaN  C26         0  \n",
      "4    Southampton         NaN  C22         1  \n",
      "..           ...         ...  ...       ...  \n",
      "834  Southampton         NaN  NaN         0  \n",
      "835  Southampton         NaN  NaN         0  \n",
      "836  Southampton         NaN  NaN         0  \n",
      "837  Southampton         NaN  NaN         0  \n",
      "838  Southampton         NaN  NaN         0  \n",
      "\n",
      "[839 rows x 8 columns]\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 839 entries, 0 to 838\n",
      "Data columns (total 8 columns):\n",
      " #   Column    Non-Null Count  Dtype  \n",
      "---  ------    --------------  -----  \n",
      " 0   pclass    839 non-null    object \n",
      " 1   name      839 non-null    object \n",
      " 2   sex       839 non-null    object \n",
      " 3   age       633 non-null    float64\n",
      " 4   embarked  821 non-null    object \n",
      " 5   ticket    69 non-null     object \n",
      " 6   room      77 non-null     object \n",
      " 7   survived  839 non-null    int64  \n",
      "dtypes: float64(1), int64(1), object(6)\n",
      "memory usage: 52.6+ KB\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "dataset=pd.read_csv('item8-ss-data-y.csv')\n",
    "print('“泰坦尼克号”乘客信息数据集')\n",
    "print(dataset)\n",
    "dataset.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "576ad508-2abc-4357-8271-0b890c93ab4e",
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "\"['name', 'ticket', 'room'] not found in axis\"",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[6], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m dataset\u001b[38;5;241m.\u001b[39mdrop([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mticket\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mroom\u001b[39m\u001b[38;5;124m'\u001b[39m],inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m      2\u001b[0m dataset[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mage\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m=\u001b[39mdataset[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mage\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mfillna(dataset[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mage\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmean())\n\u001b[0;32m      3\u001b[0m dataset\u001b[38;5;241m=\u001b[39mdataset\u001b[38;5;241m.\u001b[39mdropna()\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\pandas\\core\\frame.py:5581\u001b[0m, in \u001b[0;36mDataFrame.drop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m   5433\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdrop\u001b[39m(\n\u001b[0;32m   5434\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m   5435\u001b[0m     labels: IndexLabel \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   5442\u001b[0m     errors: IgnoreRaise \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m   5443\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m   5444\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   5445\u001b[0m \u001b[38;5;124;03m    Drop specified labels from rows or columns.\u001b[39;00m\n\u001b[0;32m   5446\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   5579\u001b[0m \u001b[38;5;124;03m            weight  1.0     0.8\u001b[39;00m\n\u001b[0;32m   5580\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m-> 5581\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mdrop(\n\u001b[0;32m   5582\u001b[0m         labels\u001b[38;5;241m=\u001b[39mlabels,\n\u001b[0;32m   5583\u001b[0m         axis\u001b[38;5;241m=\u001b[39maxis,\n\u001b[0;32m   5584\u001b[0m         index\u001b[38;5;241m=\u001b[39mindex,\n\u001b[0;32m   5585\u001b[0m         columns\u001b[38;5;241m=\u001b[39mcolumns,\n\u001b[0;32m   5586\u001b[0m         level\u001b[38;5;241m=\u001b[39mlevel,\n\u001b[0;32m   5587\u001b[0m         inplace\u001b[38;5;241m=\u001b[39minplace,\n\u001b[0;32m   5588\u001b[0m         errors\u001b[38;5;241m=\u001b[39merrors,\n\u001b[0;32m   5589\u001b[0m     )\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\pandas\\core\\generic.py:4788\u001b[0m, in \u001b[0;36mNDFrame.drop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m   4786\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m axis, labels \u001b[38;5;129;01min\u001b[39;00m axes\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m   4787\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m labels \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m-> 4788\u001b[0m         obj \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39m_drop_axis(labels, axis, level\u001b[38;5;241m=\u001b[39mlevel, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[0;32m   4790\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inplace:\n\u001b[0;32m   4791\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_inplace(obj)\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\pandas\\core\\generic.py:4830\u001b[0m, in \u001b[0;36mNDFrame._drop_axis\u001b[1;34m(self, labels, axis, level, errors, only_slice)\u001b[0m\n\u001b[0;32m   4828\u001b[0m         new_axis \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mdrop(labels, level\u001b[38;5;241m=\u001b[39mlevel, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[0;32m   4829\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 4830\u001b[0m         new_axis \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mdrop(labels, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[0;32m   4831\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mget_indexer(new_axis)\n\u001b[0;32m   4833\u001b[0m \u001b[38;5;66;03m# Case for non-unique axis\u001b[39;00m\n\u001b[0;32m   4834\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:7070\u001b[0m, in \u001b[0;36mIndex.drop\u001b[1;34m(self, labels, errors)\u001b[0m\n\u001b[0;32m   7068\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[0;32m   7069\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m-> 7070\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlabels[mask]\u001b[38;5;241m.\u001b[39mtolist()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found in axis\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m   7071\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m indexer[\u001b[38;5;241m~\u001b[39mmask]\n\u001b[0;32m   7072\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdelete(indexer)\n",
      "\u001b[1;31mKeyError\u001b[0m: \"['name', 'ticket', 'room'] not found in axis\""
     ]
    }
   ],
   "source": [
    "dataset.drop(['name','ticket','room'],inplace=True,axis=1)\n",
    "dataset['age']=dataset['age'].fillna(dataset['age'].mean())\n",
    "dataset=dataset.dropna()\n",
    "labels=dataset['pclass'].unique().tolist()\n",
    "dataset['pclass']=dataset['pclass'].apply(lambda x:labels.index(x))\n",
    "dataset['sex']=(dataset['sex']=='male').astype(int)\n",
    "labels=dataset['embarked'].unique().tolist()\n",
    "dataset['embarked']=dataset['embarked'].apply(lambda x:labels.index())\n",
    "print(dataset)\n",
    "dataset.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9324b04c-361e-447e-9cfe-1fba9a327a39",
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "could not convert string to float: 'Southampton'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[4], line 11\u001b[0m\n\u001b[0;32m      9\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m0\u001b[39m,\u001b[38;5;241m200\u001b[39m,\u001b[38;5;241m10\u001b[39m):\n\u001b[0;32m     10\u001b[0m     model\u001b[38;5;241m=\u001b[39mRandomForestClassifier(random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m,n_estimators\u001b[38;5;241m=\u001b[39mi\u001b[38;5;241m+\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m---> 11\u001b[0m     model\u001b[38;5;241m=\u001b[39mmodel\u001b[38;5;241m.\u001b[39mfit(x_train,y_train)\n\u001b[0;32m     12\u001b[0m     pred\u001b[38;5;241m=\u001b[39mmodel\u001b[38;5;241m.\u001b[39mpredict(x_test)\n\u001b[0;32m     13\u001b[0m     ac\u001b[38;5;241m=\u001b[39maccuracy_score(y_test,pred)\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\base.py:1389\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1382\u001b[0m     estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m   1384\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m   1385\u001b[0m     skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m   1386\u001b[0m         prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m   1387\u001b[0m     )\n\u001b[0;32m   1388\u001b[0m ):\n\u001b[1;32m-> 1389\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m fit_method(estimator, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\ensemble\\_forest.py:360\u001b[0m, in \u001b[0;36mBaseForest.fit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m    357\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m issparse(y):\n\u001b[0;32m    358\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msparse multilabel-indicator for y is not supported.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 360\u001b[0m X, y \u001b[38;5;241m=\u001b[39m validate_data(\n\u001b[0;32m    361\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m    362\u001b[0m     X,\n\u001b[0;32m    363\u001b[0m     y,\n\u001b[0;32m    364\u001b[0m     multi_output\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m    365\u001b[0m     accept_sparse\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcsc\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m    366\u001b[0m     dtype\u001b[38;5;241m=\u001b[39mDTYPE,\n\u001b[0;32m    367\u001b[0m     ensure_all_finite\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m    368\u001b[0m )\n\u001b[0;32m    369\u001b[0m \u001b[38;5;66;03m# _compute_missing_values_in_feature_mask checks if X has missing values and\u001b[39;00m\n\u001b[0;32m    370\u001b[0m \u001b[38;5;66;03m# will raise an error if the underlying tree base estimator can't handle missing\u001b[39;00m\n\u001b[0;32m    371\u001b[0m \u001b[38;5;66;03m# values. Only the criterion is required to determine if the tree supports\u001b[39;00m\n\u001b[0;32m    372\u001b[0m \u001b[38;5;66;03m# missing values.\u001b[39;00m\n\u001b[0;32m    373\u001b[0m estimator \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestimator)(criterion\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcriterion)\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\utils\\validation.py:2961\u001b[0m, in \u001b[0;36mvalidate_data\u001b[1;34m(_estimator, X, y, reset, validate_separately, skip_check_array, **check_params)\u001b[0m\n\u001b[0;32m   2959\u001b[0m         y \u001b[38;5;241m=\u001b[39m check_array(y, input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124my\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_y_params)\n\u001b[0;32m   2960\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 2961\u001b[0m         X, y \u001b[38;5;241m=\u001b[39m check_X_y(X, y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_params)\n\u001b[0;32m   2962\u001b[0m     out \u001b[38;5;241m=\u001b[39m X, y\n\u001b[0;32m   2964\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m no_val_X \u001b[38;5;129;01mand\u001b[39;00m check_params\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mensure_2d\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mTrue\u001b[39;00m):\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\utils\\validation.py:1370\u001b[0m, in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)\u001b[0m\n\u001b[0;32m   1364\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m   1365\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mestimator_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m requires y to be passed, but the target y is None\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   1366\u001b[0m     )\n\u001b[0;32m   1368\u001b[0m ensure_all_finite \u001b[38;5;241m=\u001b[39m _deprecate_force_all_finite(force_all_finite, ensure_all_finite)\n\u001b[1;32m-> 1370\u001b[0m X \u001b[38;5;241m=\u001b[39m check_array(\n\u001b[0;32m   1371\u001b[0m     X,\n\u001b[0;32m   1372\u001b[0m     accept_sparse\u001b[38;5;241m=\u001b[39maccept_sparse,\n\u001b[0;32m   1373\u001b[0m     accept_large_sparse\u001b[38;5;241m=\u001b[39maccept_large_sparse,\n\u001b[0;32m   1374\u001b[0m     dtype\u001b[38;5;241m=\u001b[39mdtype,\n\u001b[0;32m   1375\u001b[0m     order\u001b[38;5;241m=\u001b[39morder,\n\u001b[0;32m   1376\u001b[0m     copy\u001b[38;5;241m=\u001b[39mcopy,\n\u001b[0;32m   1377\u001b[0m     force_writeable\u001b[38;5;241m=\u001b[39mforce_writeable,\n\u001b[0;32m   1378\u001b[0m     ensure_all_finite\u001b[38;5;241m=\u001b[39mensure_all_finite,\n\u001b[0;32m   1379\u001b[0m     ensure_2d\u001b[38;5;241m=\u001b[39mensure_2d,\n\u001b[0;32m   1380\u001b[0m     allow_nd\u001b[38;5;241m=\u001b[39mallow_nd,\n\u001b[0;32m   1381\u001b[0m     ensure_min_samples\u001b[38;5;241m=\u001b[39mensure_min_samples,\n\u001b[0;32m   1382\u001b[0m     ensure_min_features\u001b[38;5;241m=\u001b[39mensure_min_features,\n\u001b[0;32m   1383\u001b[0m     estimator\u001b[38;5;241m=\u001b[39mestimator,\n\u001b[0;32m   1384\u001b[0m     input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m   1385\u001b[0m )\n\u001b[0;32m   1387\u001b[0m y \u001b[38;5;241m=\u001b[39m _check_y(y, multi_output\u001b[38;5;241m=\u001b[39mmulti_output, y_numeric\u001b[38;5;241m=\u001b[39my_numeric, estimator\u001b[38;5;241m=\u001b[39mestimator)\n\u001b[0;32m   1389\u001b[0m check_consistent_length(X, y)\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\utils\\validation.py:1055\u001b[0m, in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_non_negative, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[0;32m   1053\u001b[0m         array \u001b[38;5;241m=\u001b[39m xp\u001b[38;5;241m.\u001b[39mastype(array, dtype, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m   1054\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1055\u001b[0m         array \u001b[38;5;241m=\u001b[39m _asarray_with_order(array, order\u001b[38;5;241m=\u001b[39morder, dtype\u001b[38;5;241m=\u001b[39mdtype, xp\u001b[38;5;241m=\u001b[39mxp)\n\u001b[0;32m   1056\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ComplexWarning \u001b[38;5;28;01mas\u001b[39;00m complex_warning:\n\u001b[0;32m   1057\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m   1058\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mComplex data not supported\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(array)\n\u001b[0;32m   1059\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mcomplex_warning\u001b[39;00m\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\utils\\_array_api.py:839\u001b[0m, in \u001b[0;36m_asarray_with_order\u001b[1;34m(array, dtype, order, copy, xp, device)\u001b[0m\n\u001b[0;32m    837\u001b[0m     array \u001b[38;5;241m=\u001b[39m numpy\u001b[38;5;241m.\u001b[39marray(array, order\u001b[38;5;241m=\u001b[39morder, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m    838\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 839\u001b[0m     array \u001b[38;5;241m=\u001b[39m numpy\u001b[38;5;241m.\u001b[39masarray(array, order\u001b[38;5;241m=\u001b[39morder, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m    841\u001b[0m \u001b[38;5;66;03m# At this point array is a NumPy ndarray. We convert it to an array\u001b[39;00m\n\u001b[0;32m    842\u001b[0m \u001b[38;5;66;03m# container that is consistent with the input's namespace.\u001b[39;00m\n\u001b[0;32m    843\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m xp\u001b[38;5;241m.\u001b[39masarray(array)\n",
      "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'Southampton'"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.model_selection import train_test_split\n",
    "import matplotlib.pyplot as plt\n",
    "x=dataset.iloc[range(0,821),range(0,4)].values\n",
    "y=dataset.iloc[range(0,821),range(4,5)].values.reshape(1,821)[0]\n",
    "x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=1,test_size=0.2)\n",
    "score=[]\n",
    "for i in range(0,200,10):\n",
    "    model=RandomForestClassifier(random_state=0,n_estimators=i+1)\n",
    "    model=model.fit(x_train,y_train)\n",
    "    pred=model.predict(x_test)\n",
    "    ac=accuracy_score(y_test,pred)\n",
    "    score.append(ac)\n",
    "print('最大预测准确率为:%f'%max(score))\n",
    "n=score.index(max(score))*10+1\n",
    "print('预测准确率最大的模型对应的参数值为：%.0f'%n)\n",
    "plt.plot(range(1,201,10),score)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e2e5df47-ea18-41de-8e8c-dc88d30b7bc7",
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "could not convert string to float: 'Southampton'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[5], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m classification_report\n\u001b[0;32m      2\u001b[0m model\u001b[38;5;241m=\u001b[39mRandomForestClassifier(random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m,n_estimators\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m41\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m model\u001b[38;5;241m.\u001b[39mfit(x_train,y_train)\n\u001b[0;32m      4\u001b[0m pred\u001b[38;5;241m=\u001b[39mmodel\u001b[38;5;241m.\u001b[39mpredict(x_test)\n\u001b[0;32m      5\u001b[0m re\u001b[38;5;241m=\u001b[39mclassification_report(y_test,pred)\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\base.py:1389\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1382\u001b[0m     estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m   1384\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m   1385\u001b[0m     skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m   1386\u001b[0m         prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m   1387\u001b[0m     )\n\u001b[0;32m   1388\u001b[0m ):\n\u001b[1;32m-> 1389\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m fit_method(estimator, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\ensemble\\_forest.py:360\u001b[0m, in \u001b[0;36mBaseForest.fit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m    357\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m issparse(y):\n\u001b[0;32m    358\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msparse multilabel-indicator for y is not supported.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 360\u001b[0m X, y \u001b[38;5;241m=\u001b[39m validate_data(\n\u001b[0;32m    361\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m    362\u001b[0m     X,\n\u001b[0;32m    363\u001b[0m     y,\n\u001b[0;32m    364\u001b[0m     multi_output\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m    365\u001b[0m     accept_sparse\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcsc\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m    366\u001b[0m     dtype\u001b[38;5;241m=\u001b[39mDTYPE,\n\u001b[0;32m    367\u001b[0m     ensure_all_finite\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m    368\u001b[0m )\n\u001b[0;32m    369\u001b[0m \u001b[38;5;66;03m# _compute_missing_values_in_feature_mask checks if X has missing values and\u001b[39;00m\n\u001b[0;32m    370\u001b[0m \u001b[38;5;66;03m# will raise an error if the underlying tree base estimator can't handle missing\u001b[39;00m\n\u001b[0;32m    371\u001b[0m \u001b[38;5;66;03m# values. Only the criterion is required to determine if the tree supports\u001b[39;00m\n\u001b[0;32m    372\u001b[0m \u001b[38;5;66;03m# missing values.\u001b[39;00m\n\u001b[0;32m    373\u001b[0m estimator \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestimator)(criterion\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcriterion)\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\utils\\validation.py:2961\u001b[0m, in \u001b[0;36mvalidate_data\u001b[1;34m(_estimator, X, y, reset, validate_separately, skip_check_array, **check_params)\u001b[0m\n\u001b[0;32m   2959\u001b[0m         y \u001b[38;5;241m=\u001b[39m check_array(y, input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124my\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_y_params)\n\u001b[0;32m   2960\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 2961\u001b[0m         X, y \u001b[38;5;241m=\u001b[39m check_X_y(X, y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_params)\n\u001b[0;32m   2962\u001b[0m     out \u001b[38;5;241m=\u001b[39m X, y\n\u001b[0;32m   2964\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m no_val_X \u001b[38;5;129;01mand\u001b[39;00m check_params\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mensure_2d\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mTrue\u001b[39;00m):\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\utils\\validation.py:1370\u001b[0m, in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)\u001b[0m\n\u001b[0;32m   1364\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m   1365\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mestimator_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m requires y to be passed, but the target y is None\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   1366\u001b[0m     )\n\u001b[0;32m   1368\u001b[0m ensure_all_finite \u001b[38;5;241m=\u001b[39m _deprecate_force_all_finite(force_all_finite, ensure_all_finite)\n\u001b[1;32m-> 1370\u001b[0m X \u001b[38;5;241m=\u001b[39m check_array(\n\u001b[0;32m   1371\u001b[0m     X,\n\u001b[0;32m   1372\u001b[0m     accept_sparse\u001b[38;5;241m=\u001b[39maccept_sparse,\n\u001b[0;32m   1373\u001b[0m     accept_large_sparse\u001b[38;5;241m=\u001b[39maccept_large_sparse,\n\u001b[0;32m   1374\u001b[0m     dtype\u001b[38;5;241m=\u001b[39mdtype,\n\u001b[0;32m   1375\u001b[0m     order\u001b[38;5;241m=\u001b[39morder,\n\u001b[0;32m   1376\u001b[0m     copy\u001b[38;5;241m=\u001b[39mcopy,\n\u001b[0;32m   1377\u001b[0m     force_writeable\u001b[38;5;241m=\u001b[39mforce_writeable,\n\u001b[0;32m   1378\u001b[0m     ensure_all_finite\u001b[38;5;241m=\u001b[39mensure_all_finite,\n\u001b[0;32m   1379\u001b[0m     ensure_2d\u001b[38;5;241m=\u001b[39mensure_2d,\n\u001b[0;32m   1380\u001b[0m     allow_nd\u001b[38;5;241m=\u001b[39mallow_nd,\n\u001b[0;32m   1381\u001b[0m     ensure_min_samples\u001b[38;5;241m=\u001b[39mensure_min_samples,\n\u001b[0;32m   1382\u001b[0m     ensure_min_features\u001b[38;5;241m=\u001b[39mensure_min_features,\n\u001b[0;32m   1383\u001b[0m     estimator\u001b[38;5;241m=\u001b[39mestimator,\n\u001b[0;32m   1384\u001b[0m     input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m   1385\u001b[0m )\n\u001b[0;32m   1387\u001b[0m y \u001b[38;5;241m=\u001b[39m _check_y(y, multi_output\u001b[38;5;241m=\u001b[39mmulti_output, y_numeric\u001b[38;5;241m=\u001b[39my_numeric, estimator\u001b[38;5;241m=\u001b[39mestimator)\n\u001b[0;32m   1389\u001b[0m check_consistent_length(X, y)\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\utils\\validation.py:1055\u001b[0m, in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_non_negative, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[0;32m   1053\u001b[0m         array \u001b[38;5;241m=\u001b[39m xp\u001b[38;5;241m.\u001b[39mastype(array, dtype, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m   1054\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1055\u001b[0m         array \u001b[38;5;241m=\u001b[39m _asarray_with_order(array, order\u001b[38;5;241m=\u001b[39morder, dtype\u001b[38;5;241m=\u001b[39mdtype, xp\u001b[38;5;241m=\u001b[39mxp)\n\u001b[0;32m   1056\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ComplexWarning \u001b[38;5;28;01mas\u001b[39;00m complex_warning:\n\u001b[0;32m   1057\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m   1058\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mComplex data not supported\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(array)\n\u001b[0;32m   1059\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mcomplex_warning\u001b[39;00m\n",
      "File \u001b[1;32m~\\anaconda33\\Lib\\site-packages\\sklearn\\utils\\_array_api.py:839\u001b[0m, in \u001b[0;36m_asarray_with_order\u001b[1;34m(array, dtype, order, copy, xp, device)\u001b[0m\n\u001b[0;32m    837\u001b[0m     array \u001b[38;5;241m=\u001b[39m numpy\u001b[38;5;241m.\u001b[39marray(array, order\u001b[38;5;241m=\u001b[39morder, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m    838\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 839\u001b[0m     array \u001b[38;5;241m=\u001b[39m numpy\u001b[38;5;241m.\u001b[39masarray(array, order\u001b[38;5;241m=\u001b[39morder, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m    841\u001b[0m \u001b[38;5;66;03m# At this point array is a NumPy ndarray. We convert it to an array\u001b[39;00m\n\u001b[0;32m    842\u001b[0m \u001b[38;5;66;03m# container that is consistent with the input's namespace.\u001b[39;00m\n\u001b[0;32m    843\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m xp\u001b[38;5;241m.\u001b[39masarray(array)\n",
      "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'Southampton'"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "model=RandomForestClassifier(random_state=0,n_estimators=41)\n",
    "model.fit(x_train,y_train)\n",
    "pred=model.predict(x_test)\n",
    "re=classification_report(y_test,pred)\n",
    "print('模型评估报告：')\n",
    "print(re)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d3c8ecc6-b5ca-454a-9603-6dd947bc167e",
   "metadata": {},
   "outputs": [],
   "source": [
    "test=pd.read_csv('item8-ss-test-y.csv')\n",
    "print('需预测数据')\n",
    "print(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50e18339-c9a3-4f91-80ae-ab01b088a74a",
   "metadata": {},
   "outputs": [],
   "source": [
    "labels=dataset['pclass'].unique().tolist()\n",
    "dataset['pclass']=dataset['pclass'].apply(lambda x:labels.index(x))\n",
    "dataset['sex']=(dataset['sex']=='male').astype(int)\n",
    "labels=dataset['embarked'].unique().tolist()\n",
    "dataset['embarked']=dataset['embarked'].apply(lambda x:labels.index())\n",
    "print(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "773a5e83-99e1-4aa7-a63a-32e8c2ba995a",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_new=test.iloc[range(0,14),range(0,4)].values\n",
    "names=test.iloc[range(0,14),range(4,5)].values\n",
    "pred=model.predict(x_new)\n",
    "for result,name in zip(pred,names):\n",
    "    if result==1:\n",
    "        print(name+\"能够获救\")\n",
    "    else:\n",
    "        print(name+\"不能获救\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
